clear all
set more off
cd "/Users/yasenov/Dropbox (IPL)/RefugeeBan/Data"

capture log close
log using "../logs/wraps_clean", replace

**************************
** FILE 1 - 2010 - 2018 **
**************************
import delimited "WRAPS/MX %2D Arrivals by Destination and Nationality.csv", varnames(4) rowrange(5:79816) colrange(3:10) clear

keep nat_definition4 region_name_3 category3 assur_destinationcity1 cases3
rename (nat_definition4 region_name_3 category3 assur_destinationcity1 cases3) ///
	(state year origin city refugees)

sum
tempfile edno
save `edno'

**************************
** FILE 2 - 2000 - 2009 **
**************************
import delimited "WRAPS/MX %2D Arrivals by Destination and Nationality-2.csv", varnames(4) rowrange(5:85316) colrange(3:10) clear

keep nat_definition4 region_name_3 category3 assur_destinationcity1 cases3
rename (nat_definition4 region_name_3 category3 assur_destinationcity1 cases3) ///
	(state year origin city refugees)

sum
tempfile dve
save `dve'

************
** MERGE  ** 
************
clear
use `edno'
append using `dve'

destring year, replace ignore("CY ")
destring refugees, replace ignore(",")

sum
desc

drop if state == "Puerto Rico" | state == "Guam"

merge m:1 state using "CityMapping/state_dict.dta", keepusing(state_abbr)
assert _merge == 3
drop _merge

replace city = lower(city)
replace state_abbr = lower(state_abbr)
rename state_abbr stateabbreviation

merge m:1 stateabbreviation city using "CityMapping/crosswalk_wraps_google_opencage_censusfips.dta", gen(_m) keepusing(state_fips county_fips) keep(1 3)
keep if _m == 3
drop _m

rename stateabbreviation state_abbr

******************
* CLEAN AND SAVE * 
******************

// create fips variable
tostring state_fips, replace
tostring county_fips, replace

replace state_fips = "0" + state_fips if strlen(state_fips) == 1
replace county_fips = "0" + county_fips if strlen(county_fips) == 2
replace county_fips = "00" + county_fips if strlen(county_fips) == 1

assert strlen(state_fips) == 2
assert strlen(county_fips) == 3

gen fips = state_fips + county_fips

* clean up
order year state* county fips city origin refugee
sort year state county city origin

tab year, m

label var year ""
label var state ""
label var origin "country of origin"
label var refugee "refugee count"
label var fips "5 digit fips code"

isid year state city origin

collapse (sum) refugees, by(fips year)
sum

tab year, m

save "WRAPS/wraps_fips.dta", replace

log close
